package crawler.bentian;

import java.io.IOException;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.zql.entity.AgencyEntity;

import util.Location;
import util.MybatisTool;
import util.TianYanCha;
/*有数据 但是报错
 * java.net.SocketTimeoutException: Read timed out
抓取完毕,正在存库
	at java.net.SocketInputStream.socketRead0(Native Method)
	at java.net.SocketInputStream.socketRead(Unknown Source)
	at java.net.SocketInputStream.read(Unknown Source)
	at java.net.SocketInputStream.read(Unknown Source)
	at java.io.BufferedInputStream.fill(Unknown Source)
	at java.io.BufferedInputStream.read1(Unknown Source)
	at java.io.BufferedInputStream.read(Unknown Source)
	at sun.net.www.http.HttpClient.parseHTTPHeader(Unknown Source)
	at sun.net.www.http.HttpClient.parseHTTP(Unknown Source)
	at sun.net.www.protocol.http.HttpURLConnection.getInputStream0(Unknown Source)
	at sun.net.www.protocol.http.HttpURLConnection.getInputStream(Unknown Source)
	at java.net.HttpURLConnection.getResponseCode(Unknown Source)
	at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:656)
	at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:629)
	at org.jsoup.helper.HttpConnection.execute(HttpConnection.java:261)
	at org.jsoup.helper.HttpConnection.get(HttpConnection.java:250)
	at crawler.bentian.DongFengBenTian.getAgency(DongFengBenTian.java:35)
	at crawler.bentian.DongFengBenTian.main(DongFengBenTian.java:85)*/
public class DongFengBenTian {
	List<AgencyEntity> getAgency() {
		List<AgencyEntity> list = new ArrayList<AgencyEntity>();
		String proUrl = "http://www.dongfeng-honda.com/dot_query.shtml";
		try {
			Document doc = Jsoup.connect(proUrl).get();
			Element select = doc.getElementById("province");
			Elements proOptions = select.getElementsByTag("option");
			for (int i = 1; i < proOptions.size(); i++) {
				String proNum = proOptions.get(i).attr("province_id");
				String proName = proOptions.get(i).text();

				String cityUrl = "http://www.dongfeng-honda.com/index/get_city_bypid/" + proNum;
				Document cityDoc = Jsoup.connect(cityUrl).data("dealer_type", "dot_query").data("ajax", "true").post();
				Elements cityOptions = cityDoc.getElementsByTag("option");
				for (int j = 1; j < cityOptions.size(); j++) {
					String cityName = cityOptions.get(j).text();
					String dealerUrl = "http://www.dongfeng-honda.com/dot_query.shtml?province=" + proName + "&city="
							+ cityName;
					Document dealerDoc = Jsoup.connect(dealerUrl).get();
					Elements lis = dealerDoc.getElementsByClass("stores").get(0).getElementsByTag("li");
					for (Element li : lis) {
						String name = li.getElementsByTag("h4").text();
						Elements ps = li.getElementsByTag("p");
						String address = ps.get(0).text();
						String sellTell = ps.get(1).text().split("：")[1];
						String serviceTell = ps.get(2).text().split("：")[1];
						String[] lngAndLat = Location.getLocation(address);
						String lng = lngAndLat[0];
						String lat = lngAndLat[1];
						List<String> shareholder = TianYanCha.getShareholder(name);
						String sControllingShareholder = shareholder.get(0);
						String sOtherShareholders = shareholder.get(1);

						AgencyEntity agency = new AgencyEntity();
						agency.setdCloseDate(null);
						agency.setdOpeningDate(null);
						agency.setdUpdateTime(new Timestamp(System.currentTimeMillis()));
						agency.setnBrandID(-1);
						agency.setsBrand("本田");
						agency.setnDealerIDWeb(-1);
						agency.setnManufacturerID(-1);
						agency.setsManufacturer("东风本田");
						agency.setnState(1);
						agency.setsAddress(address);
						agency.setsCity(cityName);
						agency.setsCustomerServiceCall(serviceTell);
						agency.setsDealerName(name);
						agency.setsDealerType(null);
						agency.setsProvince(proName);
						agency.setsSaleCall(sellTell);

						agency.setsLongitude(lng);
						agency.setsLatitude(lat);
						agency.setsControllingShareholder(sControllingShareholder);
						agency.setsOtherShareholders(sOtherShareholders);

						list.add(agency);
					}
				}
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
		return list;
	}
	public static void main(String[] args) {
		DongFengBenTian crawler = new DongFengBenTian();
		System.out.println("爬虫开始...");
		List<AgencyEntity> agencys = crawler.getAgency();
		System.out.println("抓取完毕,正在存库");
		for (AgencyEntity agency : agencys) {
			MybatisTool.save(agency);
		}
		MybatisTool.close();
		System.out.println("请查看数据库");
	}
}
